import pandas as pda
import matplotlib.pylab as pyl
import jieba
from PIL import Image
import wordcloud as wc
import numpy as npy
import pymysql

#取数据
conn=pymysql.connect(host="127.0.0.1",user="root",password="123456",db="hexun")
sql="select * from hx limit 0,1000"
data=pda.read_sql(sql,conn)
dt=data.T
#点击数
x=dt.values[3].tolist()
#评论数
y=dt.values[4].tolist()
pyl.plot(x,y,"o")
font="SimHei"
pyl.xlabel(u"点击量",fontproperties=font)
pyl.ylabel(u"评论数",fontproperties=font)
pyl.title('和讯博客【点击量-评论数】散点图',fontproperties=font)
pyl.show()

#异常数据的过滤
x=(dt[ (dt.values[3]<20000)& (dt.values[4]<=250)]).values[3].tolist()
y=(dt[ (dt.values[3]<20000)& (dt.values[4]<=250)]).values[4].tolist()
pyl.plot(x,y,"o")
font="SimHei"
pyl.xlabel(u"点击量",fontproperties=font)
pyl.ylabel(u"评论数",fontproperties=font)
pyl.title('和讯博客【点击量-评论数】散点图',fontproperties=font)
pyl.show()

#异常数据的保存
error=dt[ (dt.values[3]<20000) | (dt.values[4]<=250)]
with pda.excelwriter('./异常数据.xls') as writer:
    error.to_excel(writer,sheet_name="异常数据")

#词云
data1=dt.values[2].tolist()
cutdata=jieba.cut(data1)
alldata=""
for i in cutdata:
    alldata=alldata+" "+str(i)
font=r"C:\Windows\Fonts\simhei.ttf"
cat=Image.open("e:/cat.png")
catarray=array(cat)
mywc=wc.WordCloud(collocations=False,font_path=font,mask=catarray,background_color="white").generate(alldata)
plb.imshow(mywc)
plb.show()

#和讯博客评论数直方图
pyl.hst(y)
font="SimHei"
pyl.xlabel(u"点击量",fontproperties=font)
pyl.ylabel(u"评论数",fontproperties=font)
pyl.title('和讯博客直方图分析',fontproperties=font)
pyl.show()

#和讯博客点击数直方图
pyl.hst(x)
font="SimHei"
pyl.xlabel(u"点击量",fontproperties=font)
pyl.ylabel(u"评论数",fontproperties=font)
pyl.title('和讯博客直方图分析',fontproperties=font)
pyl.show()

